Avant de commencer

Faire pointer R vers votre répertoire

setwd("le répertoire de mes données")
#knitr::opts_knit$set(root.dir = "~/Documents/GitHub/m1act_Spring21")
datadir <- "~/Documents/GitHub/m1act_Spring21/data"
exportdir <- "~/Documents/GitHub/m1act_Spring21/export"
graphdir <- "~/Documents/GitHub/m1act_Spring21/graph"

(Installer et) charger la librairie faraway

library(faraway)
## Warning: package 'faraway' was built under R version 4.0.2

Export et import d’objets R

aaa=rnorm(100)
bbb=rpois(100,1)
save(aaa,bbb,file=paste(exportdir,"simulations.RData",sep="/"))
rm(aaa,bbb)
try(aaa)
## Error in try(aaa) : objet 'aaa' introuvable
try(bbb)
## Error in try(bbb) : objet 'bbb' introuvable
load(paste(exportdir,"simulations.RData",sep="/"))
aaa
##   [1]  0.53918008  3.35624731  0.32530810  0.30869853  0.45812917 -0.18028935
##   [7]  0.78764959 -0.45366040 -0.70406372  0.09766136 -0.76862059  0.40349567
##  [13]  0.71100150 -0.22341413  0.01130540 -2.31648331  1.16312136  0.66699364
##  [19] -0.15041204 -0.36647171  0.09449877  0.36446214 -0.73098304  1.53584982
##  [25] -0.14314949 -0.37721125 -1.45730393 -0.40967923 -1.35274330 -0.36881981
##  [31]  0.16734731 -1.27853680 -1.77817152  1.01272806 -1.33617097  1.20966020
##  [37]  0.26506061  0.31971658 -2.25323741  1.82210220  1.70244205  1.23380975
##  [43]  0.78877631  0.69810510  0.73055558  0.82137775  0.38815851 -0.66786133
##  [49]  0.53397497 -0.26166113  0.60453192  1.29054167  0.54994984 -1.51491687
##  [55]  0.61199878 -0.24943743 -0.62982501  0.55483869  0.97976751  1.47480814
##  [61]  1.85454583  0.67726936  0.57139608  0.09040689 -1.36899934  1.36310270
##  [67] -0.60291506 -0.84186371 -0.26349584  0.30337803 -0.86720225  0.23917734
##  [73] -1.84514221  0.31440717  0.01304982 -0.07712896  0.03915341  1.20890182
##  [79]  1.27923318  1.70402725  0.18105603  0.77303566  0.85683752 -0.23032459
##  [85]  1.15371667 -0.53067714 -1.76710707  0.26799246  1.94570215  0.81701053
##  [91]  0.85796336  1.38389143 -0.45382472 -0.36096324  1.19460176 -1.03973040
##  [97]  0.78957432 -0.88123257 -0.24367202 -1.04461117
bbb
##   [1] 1 1 1 2 1 1 4 0 0 1 1 1 1 1 1 4 2 1 0 1 1 1 1 1 2 0 5 0 1 0 0 0 1 0 0 0 2
##  [38] 0 1 2 1 2 1 2 2 2 0 1 1 0 1 0 3 0 2 2 1 0 1 0 3 1 1 0 2 3 0 3 3 0 1 0 1 1
##  [75] 0 0 0 0 1 1 1 2 1 1 2 0 2 1 0 2 2 2 1 1 1 4 1 1 0 2

données “vulnerability”

load(paste(datadir,"vulnerability.RData",sep="/"))
#attach(vul)
#country_name
#detach(vul)
with(vul, country_name)
##   [1] Albania                       Algeria                      
##   [3] Angola                        Argentina                    
##   [5] Armenia                       Australia                    
##   [7] Austria                       Azerbaijan                   
##   [9] Bahamas                       Bangladesh                   
##  [11] Belarus                       Belgium                      
##  [13] Belize                        Benin                        
##  [15] Bhutan                        Bolivia                      
##  [17] Bosnia-Hercegovenia           Botswana                     
##  [19] Brazil                        Bulgaria                     
##  [21] Burkina Faso                  Burundi                      
##  [23] Cambodia                      Cameroon                     
##  [25] Canada                        Central African Rep          
##  [27] Chad                          Chile                        
##  [29] China P Rep                   Colombia                     
##  [31] Congo                         Costa Rica                   
##  [33] Cote d'Ivoire                 Croatia                      
##  [35] Cuba                          Czech Rep                    
##  [37] Dominican Rep                 Ecuador                      
##  [39] Egypt                         El Salvador                  
##  [41] Eritrea                       Ethiopia                     
##  [43] Fiji                          France                       
##  [45] Gambia The                    Georgia                      
##  [47] Germany                       Ghana                        
##  [49] Greece                        Grenada                      
##  [51] Guatemala                     Guinea                       
##  [53] Guinea Bissau                 Guyana                       
##  [55] Haiti                         Honduras                     
##  [57] Hong Kong (China)             Hungary                      
##  [59] India                         Indonesia                    
##  [61] Iran Islam Rep                Ireland                      
##  [63] Israel                        Italy                        
##  [65] Jamaica                       Japan                        
##  [67] Jordan                        Kazakhstan                   
##  [69] Kenya                         Korea Rep                    
##  [71] Kyrgyzstan                    Lao P Dem Rep                
##  [73] Lebanon                       Lesotho                      
##  [75] Lithuania                     Macedonia FRY                
##  [77] Madagascar                    Malawi                       
##  [79] Malaysia                      Mali                         
##  [81] Mauritania                    Mauritius                    
##  [83] Mexico                        Moldova Rep                  
##  [85] Mongolia                      Morocco                      
##  [87] Mozambique                    Myanmar                      
##  [89] Namibia                       Nepal                        
##  [91] Netherlands                   New Zealand                  
##  [93] Nicaragua                     Niger                        
##  [95] Nigeria                       Norway                       
##  [97] Oman                          Pakistan                     
##  [99] Panama                        Papua New Guinea             
## [101] Paraguay                      Peru                         
## [103] Philippines                   Poland                       
## [105] Portugal                      Romania                      
## [107] Russia                        Rwanda                       
## [109] Samoa                         Saudi Arabia                 
## [111] Senegal                       Sierra Leone                 
## [113] Slovakia                      South Africa                 
## [115] Spain                         Sri Lanka                    
## [117] St Lucia                      St Vincent and The Grenadines
## [119] Sudan                         Suriname                     
## [121] Swaziland                     Switzerland                  
## [123] Syrian Arab Rep               Tajikistan                   
## [125] Tanzania Uni Rep              Thailand                     
## [127] Timor-Leste                   Togo                         
## [129] Tonga                         Trinidad and Tobago          
## [131] Tunisia                       Turkey                       
## [133] Uganda                        Ukraine                      
## [135] United Kingdom                United States                
## [137] Uruguay                       Vanuatu                      
## [139] Venezuela                     Viet Nam                     
## [141] Yemen                         Zaire/Congo Dem Rep          
## [143] Zambia                        Zimbabwe                     
## 144 Levels: Albania Algeria Angola Argentina Armenia Australia ... Zimbabwe
summary(vul)
##     country_name     ln_urb        ln_events         ln_fert      
##  Albania  :  1   Min.   :2.182   Min.   :0.6931   Min.   :0.3716  
##  Algeria  :  1   1st Qu.:3.421   1st Qu.:2.0794   1st Qu.:0.9507  
##  Angola   :  1   Median :3.911   Median :2.8332   Median :1.4585  
##  Argentina:  1   Mean   :3.773   Mean   :2.7752   Mean   :1.3283  
##  Armenia  :  1   3rd Qu.:4.190   3rd Qu.:3.3758   3rd Qu.:1.7047  
##  Australia:  1   Max.   :4.588   Max.   :5.9480   Max.   :2.0477  
##  (Other)  :138                                                    
##       hdi             ln_pop        ln_death_risk       death_risk       
##  Min.   :0.3350   Min.   : 0.5878   Min.   :-4.3920   Min.   :  0.01238  
##  1st Qu.:0.5321   1st Qu.: 4.2487   1st Qu.:-1.3588   1st Qu.:  0.25697  
##  Median :0.7360   Median : 5.2097   Median :-0.3045   Median :  0.73753  
##  Mean   :0.6887   Mean   : 5.1372   Mean   :-0.1689   Mean   :  5.62025  
##  3rd Qu.:0.8034   3rd Qu.: 6.2394   3rd Qu.: 0.7831   3rd Qu.:  2.18859  
##  Max.   :0.9530   Max.   :10.0206   Max.   : 5.1184   Max.   :167.07002  
## 

scatterplot

with(vul,pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop,  main="Simple Scatterplot Matrix"))

if(!("GGally" %in% rownames(installed.packages()))){install.packages("GGally")}
library(GGally)
## Warning: package 'ggplot2' was built under R version 4.0.2
with(vul, ggpairs(vul[,c("ln_death_risk","ln_events","ln_fert","hdi","ln_pop")]))

Modèle avec 1 covariable

fit_univ = lm(ln_death_risk~ln_events,data=vul)
newdata=data.frame(ln_events=3.4)
pred=predict(fit_univ,newdata,interval="predict")
ic=predict(fit_univ,interval="confidence")
print(pred)
##         fit       lwr      upr
## 1 0.1543123 -3.185642 3.494266
print(ic[1:5,])
##           fit         lwr        upr
## 1 -0.41346753 -0.72116718 -0.1057679
## 2  0.20424358 -0.14006908  0.5485563
## 3 -0.02960412 -0.31691647  0.2577082
## 4  0.27723443 -0.09224715  0.6467160
## 5 -0.88753758 -1.36903423 -0.4060409
if(!("HH" %in% rownames(installed.packages()))){install.packages("HH")}
library('HH')
## Warning: package 'HH' was built under R version 4.0.2
## Warning: package 'multcomp' was built under R version 4.0.2
## Warning: package 'survival' was built under R version 4.0.2
## Warning: package 'TH.data' was built under R version 4.0.2
## Warning: package 'MASS' was built under R version 4.0.2

ci.plot(fit_univ)

Modèle avec les 4 covariables + intercept

fonction lm

fit = lm(ln_death_risk~ln_events+ln_fert+hdi+ln_pop, data=vul)
summary(fit)
## 
## Call:
## lm(formula = ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop, 
##     data = vul)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4518 -0.7673 -0.1513  0.5669  6.2271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -5.3485     1.5175  -3.524 0.000575 ***
## ln_events     1.3708     0.1792   7.649 3.04e-12 ***
## ln_fert       2.1961     0.4614   4.760 4.81e-06 ***
## hdi           1.9922     1.2628   1.578 0.116928    
## ln_pop       -0.5672     0.1026  -5.529 1.54e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.35 on 139 degrees of freedom
## Multiple R-squared:  0.4221, Adjusted R-squared:  0.4055 
## F-statistic: 25.38 on 4 and 139 DF,  p-value: 8.522e-16
anova(fit)
## Analysis of Variance Table
## 
## Response: ln_death_risk
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## ln_events   1  36.775  36.775  20.186 1.466e-05 ***
## ln_fert     1  71.336  71.336  39.156 4.542e-09 ***
## hdi         1  21.154  21.154  11.611 0.0008576 ***
## ln_pop      1  55.703  55.703  30.575 1.540e-07 ***
## Residuals 139 253.237   1.822                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(fit)
## (Intercept)   ln_events     ln_fert         hdi      ln_pop 
##  -5.3484985   1.3708219   2.1960509   1.9921835  -0.5672164
head(residuals(fit))
##          1          2          3          4          5          6 
## -0.4655284  0.1040698 -0.6106572 -0.9839096  0.1853458 -1.9841708

Diagnostics sur X

Matrice de correlations

Definition de la matrice

X = vul[,c("ln_events","ln_fert","hdi","ln_pop")]
all(X==vul[,c(3:6)])
## [1] TRUE
cor_mat = cor(X)
cov_mat = cov(X)

Calcul des valeurs propres et vecteurs propres

propres = eigen(cor_mat)
propres$values
## [1] 1.9963221 1.5984915 0.2715593 0.1336271
propres$values[1] / propres$values
## [1]  1.000000  1.248879  7.351330 14.939503

library(HH,quietly = TRUE)

Variance inflation factors

vif(fit)
## ln_events   ln_fert       hdi    ln_pop 
##  2.421759  3.642415  3.767663  2.460624

Les VIF sont tous inférieurs à 5, donc pas de problème de colinéarité.

Calcul des VIFs directement à partir des formules.

R2.1 = summary(lm(ln_events~ln_fert+hdi+ln_pop, data=vul))$r.squared
(VIF1 <- 1/(1 - R2.1))
## [1] 2.421759
R2.2 = summary(lm(ln_fert~ln_events+hdi+ln_pop, data=vul))$r.squared
(VIF2 <- 1/(1 - R2.2))
## [1] 3.642415
R2.3 = summary(lm(hdi~ln_events+ln_fert+ln_pop, data=vul))$r.squared
(VIF3 <- 1/(1 - R2.3))
## [1] 3.767663
R2.4 = summary(lm(ln_pop~ln_events+ln_fert+hdi, data=vul))$r.squared
(VIF4 <- 1/(1 - R2.4))
## [1] 2.460624

Analyse des résidus

Valeurs ajustées \(\hat y\)

yhat = fitted(fit)
#fit$fitted.values

Résidus \(e = y - \hat y\)

e = residuals(fit)
#fit$residuals

Vérification du calcul des résidus à partir des valeurs ajustées. Utilité de la fonction all.equal par rapport à la comparaison directe avec ==.

with(vul, all(ln_death_risk-yhat==residuals(fit)))
## [1] FALSE
with(vul, all.equal(ln_death_risk-yhat,residuals(fit)))
## [1] TRUE

Residus stabdardisés \(e'\)

e_prime = rstandard(fit)

Residus studentisés \(e^\star\)

e_star = rstudent(fit)
lattice::histogram(e_prime-e_star)

boxplot(e_prime-e_star)

Normalité des erreurs

#plot(fit,which=2)
shapiro.test(e)
## 
##  Shapiro-Wilk normality test
## 
## data:  e
## W = 0.94169, p-value = 1.058e-05
qqnorm(e)
qqline(e)

L’asymétrie de la distribution de la racine carré de la valeur absolue des résidus standardisés \(\sqrt{|E|}\) est beaucoup plus faible que celle de la valeur absolue des résidus standardisés \(|E|\) si les \(E\) sont distribuées suivant des lois normales centrées \(N(0,.)\). Utiliser cette racine carrée permet donc d’avoir un indicateur supplémentaire de normalité.

Graphique résidus/valeurs ajustées

plot(fit,which=1)

Graphique scale-location

if(!("lmtest" %in% rownames(installed.packages()))){install.packages("lmtest")}
plot(fit,which=3)

library(lmtest, quietly = TRUE)
## Warning: package 'lmtest' was built under R version 4.0.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
bptest(ln_death_risk~ln_events+ln_fert+hdi+ln_pop+I(ln_events^2)+I(ln_fert^2)+I(hdi^2)+I(ln_pop^2),data = vul)
## 
##  studentized Breusch-Pagan test
## 
## data:  ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop + I(ln_events^2) +     I(ln_fert^2) + I(hdi^2) + I(ln_pop^2)
## BP = 8.3579, df = 8, p-value = 0.3993

##Pas de normalité -> permutations ###Version 1 avec lmPerm

library(lmPerm)
## Warning: package 'lmPerm' was built under R version 4.0.2
fit_p <- lmp(ln_death_risk~ln_events+ln_fert+hdi+ln_pop, data=vul)
## [1] "Settings:  unique SS : numeric variables centered"
summary(fit_p)
## 
## Call:
## lmp(formula = ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop, 
##     data = vul)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4518 -0.7673 -0.1513  0.5669  6.2271 
## 
## Coefficients:
##           Estimate Iter Pr(Prob)    
## ln_events   1.3708 5000   <2e-16 ***
## ln_fert     2.1961 5000   <2e-16 ***
## hdi         1.9922 1143   0.0805 .  
## ln_pop     -0.5672 5000   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.35 on 139 degrees of freedom
## Multiple R-Squared: 0.4221,  Adjusted R-squared: 0.4055 
## F-statistic: 25.38 on 4 and 139 DF,  p-value: 8.522e-16

###Version 2 avec pgirmess

library(pgirmess)
## Warning: package 'pgirmess' was built under R version 4.0.2
fit_p2 <- PermTest(fit)
print(fit_p2)
## 
## Monte-Carlo test
## 
## Call: 
## PermTest.lm(obj = fit)
## 
## Based on 1000 replicates
## Simulated p-value:
##           p.value
## ln_events   0.000
## ln_fert     0.000
## hdi         0.002
## ln_pop      0.000

Cas avec hétéroscédasticité :

Données simulées

n =50
X=rnorm(n,1)
epsilon = rnorm(n,0,0.5)
Yok = 2 +3* X+epsilon
lmok = lm(Yok~X)

Yhs = 2 +3* X+ abs(X)^2*epsilon
lmhs = lm(Yhs~X)

Graphiques résidus/ajustées

par(mfrow=c(1,2))
plot(lmok,which=1,main="cas ok")
plot(lmhs,which=1,main="cas heteroscedastique")

#plot(lmnl,which=1,main="cas non lineaire")

Graphiques scale/location

par(mfrow=c(1,2))
plot(lmok,which=3,main="cas ok")
plot(lmhs,which=3,main="cas heteroscedastique")

#plot(lmnl,which=3,main="cas non lineaire")

Observations influentes

Où sont les points “aberrants” ?

pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop,  main="Simple Scatterplot Matrix", data=vul, col=(24+36*as.numeric(abs(e_star)>2)))

Leviers, observations influentes

#Illustrer notion de levier

influences = lm.influence(fit)
hat = influences$hat

Graphique des résidus

pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop, data=vul, 
      main="Simple Scatterplot Matrix", 
      col=(24+50*as.numeric(hat>(2*4+2) / nrow(vul))))

Graphique DCook

any((e^2-hat)/((ncol(X)-1+1)*(1-hat)^2)>4/nrow(vul))
## [1] FALSE
which((e^2-hat)/((ncol(X)-1+1)*(1-hat)^2)>4/nrow(vul))
## integer(0)
par(mfrow=c(1,2))
plot(fit, which=4:5)

Graphique Dbetas

dfbetas=apply(abs(influences$coefficients)/influences$sigma,1,`/`,sqrt(c(144,diag(cov_mat))))
colSums(dfbetas>2/nrow(vul))
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   1   1   1   2   2   2   2   1   2   4   1   2   2   1   3   1   2   0   2   2 
##  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
##   2   2   2   1   2   3   1   1   1   0   2   1   1   2   1   2   2   2   3   2 
##  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
##   3   1   1   1   1   2   1   2   2   4   2   1   3   1   4   4   1   2   1   1 
##  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
##   1   2   2   0   1   0   4   3   2   2   1   1   3   1   2   2   1   1   2   3 
##  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 
##   1   1   2   3   1   2   2   5   2   2   1   1   3   2   2   2   4   1   2   1 
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 
##   1   2   0   2   1   2   2   1   2   2   1   2   2   2   1   1   2   1   1   2 
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 
##   2   2   3   2   1   1   3   0   4   1   2   2   1   2   1   2   2   0   4   2 
## 141 142 143 144 
##   0   1   1   0

Relations non-linéaires

Relation non-linéaire due à une covariable

n =50
X=matrix(rnorm(n*2),ncol=2)
epsilon = rnorm(n,0,0.5)

Ynl = 2 - X[,1] + 3* X[,2]^2 + epsilon
lmnl = lm(Ynl~X[,1]+X[,2])

par(mfrow=c(1,2))
plot(lmnl, which = 1:2)

par(mfrow=c(1,2))
plot(lmnl, which = 3:4)

library(car)
## Warning: package 'car' was built under R version 4.0.2
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4
## 
## Attaching package: 'car'
## The following objects are masked from 'package:HH':
## 
##     logit, vif
## The following objects are masked from 'package:faraway':
## 
##     logit, vif

crPlots(lmnl)

Transformation de la variable

X2_2 = X[,2]^2
lmnl_2 = lm(Ynl~X[,1]+X2_2)
crPlots(lmnl_2)

Relation non-linéaire due Y

n =50
X=matrix(rnorm(n*2),ncol=2)
epsilon = rnorm(n,0,0.5)

ln_Y = 1 - X[,1] + 0.1* X[,2] + epsilon
Y = exp(ln_Y)
lm_ln = lm(Y~X[,1]+X[,2])

par(mfrow=c(1,2))
plot(lm_ln, which = 1:2)

par(mfrow=c(1,2))
plot(lm_ln, which = 3:4)

crPlots(lm_ln)

Transformation de Y

lm_ln_trans = lm(log(Y)~X[,1]+X[,2])
plot(lm_ln_trans)

crPlots(lm_ln_trans)

Interpretation : variables discrètes

?mtcars
mtcars_simple = mtcars[c(1,2,6)]
summary(mtcars_simple)
##       mpg             cyl              wt       
##  Min.   :10.40   Min.   :4.000   Min.   :1.513  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:2.581  
##  Median :19.20   Median :6.000   Median :3.325  
##  Mean   :20.09   Mean   :6.188   Mean   :3.217  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:3.610  
##  Max.   :33.90   Max.   :8.000   Max.   :5.424

library("dplyr")
## Warning: package 'dplyr' was built under R version 4.0.2
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

glimpse(mtcars_simple)
## Rows: 32
## Columns: 3
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ wt  <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…

mtcars_simple<- dplyr::mutate(mtcars_simple, cyl = factor(cyl))
glimpse(mtcars_simple)
## Rows: 32
## Columns: 3
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <fct> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ wt  <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…

fit_simple = lm(mpg~wt+factor(cyl),data=mtcars_simple)
summary(fit_simple)
## 
## Call:
## lm(formula = mpg ~ wt + factor(cyl), data = mtcars_simple)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5890 -1.2357 -0.5159  1.3845  5.7915 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.9908     1.8878  18.006  < 2e-16 ***
## wt            -3.2056     0.7539  -4.252 0.000213 ***
## factor(cyl)6  -4.2556     1.3861  -3.070 0.004718 ** 
## factor(cyl)8  -6.0709     1.6523  -3.674 0.000999 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.557 on 28 degrees of freedom
## Multiple R-squared:  0.8374, Adjusted R-squared:   0.82 
## F-statistic: 48.08 on 3 and 28 DF,  p-value: 3.594e-11

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:latticeExtra':
## 
##     layer

ggplot(mtcars_simple, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
  geom_point() 

fit_croise = lm(mpg ~ wt * cyl, data = mtcars_simple)
summary(fit_croise)
## 
## Call:
## lm(formula = mpg ~ wt * cyl, data = mtcars_simple)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1513 -1.3798 -0.6389  1.4938  5.2523 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   39.571      3.194  12.389 2.06e-12 ***
## wt            -5.647      1.359  -4.154 0.000313 ***
## cyl6         -11.162      9.355  -1.193 0.243584    
## cyl8         -15.703      4.839  -3.245 0.003223 ** 
## wt:cyl6        2.867      3.117   0.920 0.366199    
## wt:cyl8        3.455      1.627   2.123 0.043440 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.449 on 26 degrees of freedom
## Multiple R-squared:  0.8616, Adjusted R-squared:  0.8349 
## F-statistic: 32.36 on 5 and 26 DF,  p-value: 2.258e-10

ggplot(mtcars_simple, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
  geom_point() + 
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE)
## `geom_smooth()` using formula 'y ~ x'

ggplot(mtcars_simple, aes(x=wt, y=mpg)) +
  geom_point() + 
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE, formula = y ~ x)

summary(fit_simple)
## 
## Call:
## lm(formula = mpg ~ wt + factor(cyl), data = mtcars_simple)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5890 -1.2357 -0.5159  1.3845  5.7915 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.9908     1.8878  18.006  < 2e-16 ***
## wt            -3.2056     0.7539  -4.252 0.000213 ***
## factor(cyl)6  -4.2556     1.3861  -3.070 0.004718 ** 
## factor(cyl)8  -6.0709     1.6523  -3.674 0.000999 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.557 on 28 degrees of freedom
## Multiple R-squared:  0.8374, Adjusted R-squared:   0.82 
## F-statistic: 48.08 on 3 and 28 DF,  p-value: 3.594e-11
summary(fit_croise)
## 
## Call:
## lm(formula = mpg ~ wt * cyl, data = mtcars_simple)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1513 -1.3798 -0.6389  1.4938  5.2523 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   39.571      3.194  12.389 2.06e-12 ***
## wt            -5.647      1.359  -4.154 0.000313 ***
## cyl6         -11.162      9.355  -1.193 0.243584    
## cyl8         -15.703      4.839  -3.245 0.003223 ** 
## wt:cyl6        2.867      3.117   0.920 0.366199    
## wt:cyl8        3.455      1.627   2.123 0.043440 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.449 on 26 degrees of freedom
## Multiple R-squared:  0.8616, Adjusted R-squared:  0.8349 
## F-statistic: 32.36 on 5 and 26 DF,  p-value: 2.258e-10
anova(fit_simple)
## Analysis of Variance Table
## 
## Response: mpg
##             Df Sum Sq Mean Sq  F value    Pr(>F)    
## wt           1 847.73  847.73 129.6650 5.079e-12 ***
## factor(cyl)  2  95.26   47.63   7.2856  0.002835 ** 
## Residuals   28 183.06    6.54                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_croise)
## Analysis of Variance Table
## 
## Response: mpg
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## wt         1 847.73  847.73 141.3883 5.133e-12 ***
## cyl        2  95.26   47.63   7.9443   0.00203 ** 
## wt:cyl     2  27.17   13.58   2.2658   0.12386    
## Residuals 26 155.89    6.00                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_simple,fit_croise)
## Analysis of Variance Table
## 
## Model 1: mpg ~ wt + factor(cyl)
## Model 2: mpg ~ wt * cyl
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     28 183.06                           
## 2     26 155.89  2     27.17 2.2658 0.1239